In [65]:
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd  
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly 
plotly.offline.init_notebook_mode()
In [66]:
from sklearn import datasets
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
In [67]:
df_diabetes=pd.DataFrame(data=diabetes_X,columns=datasets.load_diabetes().feature_names)
print(df_diabetes)
          age       sex       bmi        bp        s1        s2        s3   
0    0.038076  0.050680  0.061696  0.021872 -0.044223 -0.034821 -0.043401  \
1   -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163  0.074412   
2    0.085299  0.050680  0.044451 -0.005670 -0.045599 -0.034194 -0.032356   
3   -0.089063 -0.044642 -0.011595 -0.036656  0.012191  0.024991 -0.036038   
4    0.005383 -0.044642 -0.036385  0.021872  0.003935  0.015596  0.008142   
..        ...       ...       ...       ...       ...       ...       ...   
437  0.041708  0.050680  0.019662  0.059744 -0.005697 -0.002566 -0.028674   
438 -0.005515  0.050680 -0.015906 -0.067642  0.049341  0.079165 -0.028674   
439  0.041708  0.050680 -0.015906  0.017293 -0.037344 -0.013840 -0.024993   
440 -0.045472 -0.044642  0.039062  0.001215  0.016318  0.015283 -0.028674   
441 -0.045472 -0.044642 -0.073030 -0.081413  0.083740  0.027809  0.173816   

           s4        s5        s6  
0   -0.002592  0.019907 -0.017646  
1   -0.039493 -0.068332 -0.092204  
2   -0.002592  0.002861 -0.025930  
3    0.034309  0.022688 -0.009362  
4   -0.002592 -0.031988 -0.046641  
..        ...       ...       ...  
437 -0.002592  0.031193  0.007207  
438  0.034309 -0.018114  0.044485  
439 -0.011080 -0.046883  0.015491  
440  0.026560  0.044529 -0.025930  
441 -0.039493 -0.004222  0.003064  

[442 rows x 10 columns]
In [68]:
X = diabetes_X[:, 2]
X_train, X_test, y_train, y_test = train_test_split(X, diabetes_y, test_size=0.2, random_state=42)
X_re = X_train.reshape(-1,1)
X_re2 = X_test.reshape(-1,1)
model = LinearRegression()
model.fit(X_re, y_train)
y_pred = model.predict(X_re2)
y_pred2 = model.predict(X_re)
In [69]:
sns.scatterplot(x=X_train, y=y_train, label='Training Data', color='Blue')
sns.scatterplot(x=X_test, y=y_test, label='Test Data', color='Red')
sns.lineplot(x=X_test, y=y_pred, color='Green', label='Linear Regression Model')
plt.xlabel('BMI')
plt.ylabel('Disease Progression')
plt.title('BMI vs Disease Progression')
plt.legend()
plt.show()
In [70]:
plt.scatter(X_train, y_train, color='pink', label='Training Data')
plt.scatter(X_test, y_test, color='purple', label='Test Data')
plt.plot(X_test, y_pred, color='red', label='Linear Regression Model')
plt.xlabel('BMI')
plt.ylabel('Disease Progression')
plt.legend(loc='lower right')
plt.show()
In [71]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=X_train, y=y_train, mode='markers', name='Training Data', marker=dict(color='gold')))
fig.add_trace(go.Scatter(x=X_test, y=y_test, mode='markers', name='Test Data', marker=dict(color='brown')))
fig.add_trace(go.Scatter(x=X_test, y=y_pred, mode='lines', name='Linear Regression Model'))
fig.update_traces(line=dict(color='grey'), selector=dict(type='scatter', mode='lines'))
fig.update_layout(title='BMI vs Disease Progression',
                  xaxis_title='BMI',
                  yaxis_title='Disease Progression')
fig.show()
In [72]:
mse_test = mean_squared_error(y_test, y_pred)
mse_train = mean_squared_error(y_train, y_pred2)
slope = model.coef_[0]
intrcpt = model.intercept_
print("Mean Squared Error of Test data : ",mse_test)
print("Mean Squared Error of Train data : ",mse_train)
print("Slope : ",slope)
print("Intercept : ",intrcpt)
Mean Squared Error of Test data :  4061.8259284949268
Mean Squared Error of Train data :  3854.11265207582
Slope :  998.5776891375593
Intercept :  152.00335421448167

It is a Bad fit.¶